In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
In [1]:
df = pd.read_csv("covid_19_data.csv")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-3575dc49b97a> in <module>
----> 1 df = pd.read_csv("covid_19_data.csv")

NameError: name 'pd' is not defined
In [16]:
df.head(50)
Out[16]:
SNo ObservationDate Province/State Country/Region Last Update Confirmed Deaths Recovered
0 1 01/22/2020 Anhui Mainland China 1/22/2020 17:00 1.0 0.0 0.0
1 2 01/22/2020 Beijing Mainland China 1/22/2020 17:00 14.0 0.0 0.0
2 3 01/22/2020 Chongqing Mainland China 1/22/2020 17:00 6.0 0.0 0.0
3 4 01/22/2020 Fujian Mainland China 1/22/2020 17:00 1.0 0.0 0.0
4 5 01/22/2020 Gansu Mainland China 1/22/2020 17:00 0.0 0.0 0.0
5 6 01/22/2020 Guangdong Mainland China 1/22/2020 17:00 26.0 0.0 0.0
6 7 01/22/2020 Guangxi Mainland China 1/22/2020 17:00 2.0 0.0 0.0
7 8 01/22/2020 Guizhou Mainland China 1/22/2020 17:00 1.0 0.0 0.0
8 9 01/22/2020 Hainan Mainland China 1/22/2020 17:00 4.0 0.0 0.0
9 10 01/22/2020 Hebei Mainland China 1/22/2020 17:00 1.0 0.0 0.0
10 11 01/22/2020 Heilongjiang Mainland China 1/22/2020 17:00 0.0 0.0 0.0
11 12 01/22/2020 Henan Mainland China 1/22/2020 17:00 5.0 0.0 0.0
12 13 01/22/2020 Hong Kong Hong Kong 1/22/2020 17:00 0.0 0.0 0.0
13 14 01/22/2020 Hubei Mainland China 1/22/2020 17:00 444.0 17.0 28.0
14 15 01/22/2020 Hunan Mainland China 1/22/2020 17:00 4.0 0.0 0.0
15 16 01/22/2020 Inner Mongolia Mainland China 1/22/2020 17:00 0.0 0.0 0.0
16 17 01/22/2020 Jiangsu Mainland China 1/22/2020 17:00 1.0 0.0 0.0
17 18 01/22/2020 Jiangxi Mainland China 1/22/2020 17:00 2.0 0.0 0.0
18 19 01/22/2020 Jilin Mainland China 1/22/2020 17:00 0.0 0.0 0.0
19 20 01/22/2020 Liaoning Mainland China 1/22/2020 17:00 2.0 0.0 0.0
20 21 01/22/2020 Macau Macau 1/22/2020 17:00 1.0 0.0 0.0
21 22 01/22/2020 Ningxia Mainland China 1/22/2020 17:00 1.0 0.0 0.0
22 23 01/22/2020 Qinghai Mainland China 1/22/2020 17:00 0.0 0.0 0.0
23 24 01/22/2020 Shaanxi Mainland China 1/22/2020 17:00 0.0 0.0 0.0
24 25 01/22/2020 Shandong Mainland China 1/22/2020 17:00 2.0 0.0 0.0
25 26 01/22/2020 Shanghai Mainland China 1/22/2020 17:00 9.0 0.0 0.0
26 27 01/22/2020 Shanxi Mainland China 1/22/2020 17:00 1.0 0.0 0.0
27 28 01/22/2020 Sichuan Mainland China 1/22/2020 17:00 5.0 0.0 0.0
28 29 01/22/2020 Taiwan Taiwan 1/22/2020 17:00 1.0 0.0 0.0
29 30 01/22/2020 Tianjin Mainland China 1/22/2020 17:00 4.0 0.0 0.0
30 31 01/22/2020 Tibet Mainland China 1/22/2020 17:00 0.0 0.0 0.0
31 32 01/22/2020 Washington US 1/22/2020 17:00 1.0 0.0 0.0
32 33 01/22/2020 Xinjiang Mainland China 1/22/2020 17:00 0.0 0.0 0.0
33 34 01/22/2020 Yunnan Mainland China 1/22/2020 17:00 1.0 0.0 0.0
34 35 01/22/2020 Zhejiang Mainland China 1/22/2020 17:00 10.0 0.0 0.0
35 36 01/22/2020 NaN Japan 1/22/2020 17:00 2.0 0.0 0.0
36 37 01/22/2020 NaN Thailand 1/22/2020 17:00 4.0 0.0 2.0
37 38 01/22/2020 NaN South Korea 1/22/2020 17:00 1.0 0.0 0.0
38 39 01/22/2020 Unknown China 1/22/2020 17:00 0.0 0.0 0.0
39 40 01/22/2020 NaN Kiribati 1/22/2020 17:00 0.0 0.0 0.0
40 41 01/23/2020 Anhui Mainland China 1/23/20 17:00 9.0 0.0 0.0
41 42 01/23/2020 Beijing Mainland China 1/23/20 17:00 22.0 0.0 0.0
42 43 01/23/2020 Chongqing Mainland China 1/23/20 17:00 9.0 0.0 0.0
43 44 01/23/2020 Fujian Mainland China 1/23/20 17:00 5.0 0.0 0.0
44 45 01/23/2020 Gansu Mainland China 1/23/20 17:00 2.0 0.0 0.0
45 46 01/23/2020 Guangdong Mainland China 1/23/20 17:00 32.0 0.0 2.0
46 47 01/23/2020 Guangxi Mainland China 1/23/20 17:00 5.0 0.0 0.0
47 48 01/23/2020 Guizhou Mainland China 1/23/20 17:00 3.0 0.0 0.0
48 49 01/23/2020 Hainan Mainland China 1/23/20 17:00 5.0 0.0 0.0
49 50 01/23/2020 Hubei Mainland China 1/23/20 17:00 444.0 17.0 28.0
In [17]:
df.drop(['SNo','Last Update'],axis=1,inplace=True)
df.rename(columns={'ObservationDate':'Date','Province/State':'State','Country/Region':'Country'},inplace=True)
In [21]:
df['Date'] = pd.to_datetime(df['Date'])
In [22]:
imputer = SimpleImputer(strategy='constant')
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.columns)
In [29]:
df3 = df2.groupby(['Country','Date'])[['Country','Date','Confirmed','Deaths','Recovered']].sum().reset_index()
In [30]:
df3.head(20)
Out[30]:
Country Date Confirmed Deaths Recovered
0 Azerbaijan 2020-02-28 1.0 0.0 0.0
1 ('St. Martin',) 2020-03-10 2.0 0.0 0.0
2 Afghanistan 2020-02-24 1.0 0.0 0.0
3 Afghanistan 2020-02-25 1.0 0.0 0.0
4 Afghanistan 2020-02-26 1.0 0.0 0.0
5 Afghanistan 2020-02-27 1.0 0.0 0.0
6 Afghanistan 2020-02-28 1.0 0.0 0.0
7 Afghanistan 2020-02-29 1.0 0.0 0.0
8 Afghanistan 2020-03-01 1.0 0.0 0.0
9 Afghanistan 2020-03-02 1.0 0.0 0.0
10 Afghanistan 2020-03-03 2.0 0.0 0.0
11 Afghanistan 2020-03-04 4.0 0.0 0.0
12 Afghanistan 2020-03-05 4.0 0.0 0.0
13 Afghanistan 2020-03-06 4.0 0.0 0.0
14 Afghanistan 2020-03-07 4.0 0.0 0.0
15 Afghanistan 2020-03-08 5.0 0.0 0.0
16 Afghanistan 2020-03-09 7.0 0.0 0.0
17 Afghanistan 2020-03-10 8.0 0.0 0.0
18 Afghanistan 2020-03-11 11.0 0.0 0.0
19 Afghanistan 2020-03-12 12.0 0.0 0.0
In [31]:
countries = df3['Country'].unique()
len(countries)
Out[31]:
229
In [39]:
for idx in range(0,len(countries)):    
    C = df3[df3['Country']==countries[idx]].reset_index()        
    plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='blue',label='Confirmed')
    plt.scatter(np.arange(0,len(C)),C['Recovered'],color='green',label='Recovered')
    plt.scatter(np.arange(0,len(C)),C['Deaths'],color='red',label='Deaths')
    plt.title(countries[idx])
    plt.xlabel('Days since the first suspect')
    plt.ylabel('Number of cases')
    plt.legend()
    plt.show()
In [40]:
df4 = df3.groupby(['Date'])[['Date','Confirmed','Deaths','Recovered']].sum().reset_index()
In [41]:
C = df4
plt.scatter(np.arange(0,len(C)),C['Confirmed'],color='blue',label='Confirmed')
plt.scatter(np.arange(0,len(C)),C['Recovered'],color='green',label='Recovered')
plt.scatter(np.arange(0,len(C)),C['Deaths'],color='red',label='Deaths')
plt.title('World')
plt.xlabel('Days since the first suspect')
plt.ylabel('Number of cases')
plt.legend()
plt.show()
In [ ]: